/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.yyl.weibospider.gather.service;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.yyl.weibospider.gather.dao.UserCountDao;
import com.yyl.weibospider.gather.domain.PersonBean;
import com.yyl.weibospider.gather.domain.UserCountBean;
import com.yyl.weibospider.gather.domain.WeiboBean;
import com.yyl.weibospider.gather.domain.WeiboTaskBean;
import com.yyl.weibospider.gather.domain.WeiboTaskBean.StopReson;
import com.yyl.weibospider.gather.operate.WeiboContextGet;
import com.yyl.weibospider.gather.operate.WeiboContextParse;
import com.yyl.weibospider.gather.operate.impl.WeiboContextGetImpl;
import com.yyl.weibospider.gather.operate.impl.WeiboContextParseImpl;
import com.yyl.weibospider.gather.util.GeneraDataQueue;
import com.yyl.weibospider.gather.util.WeiboTaskDataQueue;

/**
 * 
 * @author ky
 */
public class WeiboSpiderServer implements Runnable {

	private static final String DEFAULT_SPIDER_TIME = "2014-05-01 00:00";

	// 用于获取html
	private WeiboContextGet weiboGet;
	// 用于解析html
	private WeiboContextParse weiboParse;

	// 用户获取登陆账号
	private UserCountDao userCountDao;
	// private WeiboTaskManager taskManager;

	// 任务队列，任务内部包含需要爬取的用户信息
	// private Queue<WeiboTaskBean> taskQueue;
	private static GeneraDataQueue<WeiboTaskBean> taskQueue;

	private static boolean getPersonOver = false;

	// 用于微博信息的存储
	private WeiboServer weiboServer;

	// 存储抓取状态
	private PersonServer personServer;

	/**
	 * 抓取线程数目,默认是 1
	 */
	private int spiderNum = 1;

	public WeiboSpiderServer() {

		weiboGet = new WeiboContextGetImpl();
		weiboParse = new WeiboContextParseImpl();
		// userCountDao = new WeiboCountManagerImpl();

		taskQueue = new WeiboTaskDataQueue();

		weiboServer = new WeiboServer();
		personServer = new PersonServer();

	}

	public int getSpiderNum() {
		return spiderNum;
	}

	public void setSpiderNum(int spiderNum) {
		this.spiderNum = spiderNum;
	}

	public boolean login(String u, String p) {
		try {
			return this.weiboGet.login(u, p);
		} catch (Exception ex) {
			Logger.getLogger(WeiboSpiderServer.class.getName()).log(
					Level.SEVERE, null, ex);
		}

		return false;

	}

	public List<WeiboBean> getByUserIdAndPart(String userId, int page, int part) {

		List<WeiboBean> weibos = new ArrayList<WeiboBean>();

		try {

			// pre_page=0&page=1 //第一页第一部分
			// pre_page=1&page=1&pagebar=0 //第二页第二部分
			// pre_page=1&page=1&pagebar=1 //第三页第三部分

			String weiboPageByUserId;
			// 获取第一部分
			if (part == 1) {

				weiboPageByUserId = weiboGet.getWeiboPageByUserId(userId, 0,
						page, 1);
			} else if (part == 2) {
				// 获取第二部分
				weiboPageByUserId = weiboGet.getWeiboPageByUserId(userId, page,
						page, 0);
			} else {
				// 获取第三部分
				weiboPageByUserId = weiboGet.getWeiboPageByUserId(userId, page,
						page, 1);
			}

			List<WeiboBean> parse = weiboParse.parse(weiboPageByUserId);

			weibos.addAll(parse);

		} catch (Exception ex) {
			Logger.getLogger(WeiboSpiderServer.class.getName()).log(
					Level.SEVERE, null, ex);
		}

		return weibos;

	}

	public List<WeiboBean> getByUserId(String userId, int page) {

		List<WeiboBean> weibos = new ArrayList<WeiboBean>();
		try {

			// pre_page=0&page=1 //第一页第一部分
			// pre_page=1&page=1&pagebar=0 //第二页第二部分
			// pre_page=1&page=1&pagebar=1 //第三页第三部分

			// 获取第一部分
			String weiboPageByUserId = weiboGet.getWeiboPageByUserId(userId, 0,
					page, 1);
			List<WeiboBean> parse = weiboParse.parse(weiboPageByUserId);

			if (parse.isEmpty()) {

				return weibos;
			}

			weibos.addAll(parse);

			// 获取第二部分
			weiboPageByUserId = weiboGet.getWeiboPageByUserId(userId, page,
					page, 0);
			List<WeiboBean> parse1 = weiboParse.parse(weiboPageByUserId);
			if (parse1.isEmpty()) {

				return weibos;
			}

			weibos.addAll(parse1);

			// 获取第三部分
			weiboPageByUserId = weiboGet.getWeiboPageByUserId(userId, page,
					page, 1);
			List<WeiboBean> parse2 = weiboParse.parse(weiboPageByUserId);

			if (parse2.isEmpty()) {

				return weibos;
			}

			weibos.addAll(parse2);

		} catch (Exception ex) {
			Logger.getLogger(WeiboSpiderServer.class.getName()).log(
					Level.SEVERE, null, ex);
		}

		return weibos;
	}

	public boolean judgeStop(WeiboTaskBean task, WeiboBean weibo) {

		WeiboTaskBean.StopReson stopReson = task.getStopReson();
		// 没有终止条件
		if (stopReson == WeiboTaskBean.StopReson.NoStop) {
			return false;
		}

		// 根据id终止 越往后发布的微博id越小，当小过停止处微博的id后，便可认为抓取结束了
		if (stopReson == WeiboTaskBean.StopReson.StopByID) {

			try {
				// 获取终止id
				long stopId = Long.parseLong(task.getStopValue().toString());

				// 获取当前微博id
				long currID = Long.parseLong(weibo.getWeiboID());

				// 当前微博id
				if (currID <= stopId) {
					return true;
				}

			}// 出现此异常，一般为任务中用来判断中止的id为空，说明人物未抓取过
			catch (Exception e) {

			}

		} // 根据发布时间终止
		else if (stopReson == WeiboTaskBean.StopReson.StopByTime) {

			// value 为null 说明该任务中的用户从未抓取过
			// 所以需要微博信息，不能停止抓取
			if (task.getStopValue() == null) {
				return false;
			}

			SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm");
			try {

				// 得到停止抓取的时间
				Date parseStop = sdf.parse(task.getStopValue().toString());

				// 得到当前时间
				Date parseCurr = sdf.parse(weibo.getWeiboTime());

				// 比较当前微博时间是否在截止时间之前
				return parseCurr.before(parseStop);

			} catch (Exception ex) {

			}
		}

		return false;

	}

	private static final String MOBIL_USER_INFO_PREFIX = "http://weibo.com/";

	public void startTask() {

		System.out.println("HHHHHHHHHHHHHHHHHH");
		// 登陆，这个是必须的，测试时用自己的账号，
		// 这个号登多了有可能会被新浪屏蔽
		UserCountBean userCount = userCountDao.getUserCount();
		boolean login = this.login(userCount.getUserName(),
				userCount.getUserPassword());

		if (login) {
			System.out.println("登陆成功！");
		} else {
			System.out.println("登陆失败！");
			return;
		}

		new Thread() {

			public void run() {

				while (true) {

					List<PersonBean> persons = personServer
							.getSomePersonByLastWeiboUpdateTimeToSpiser(100);

					if (persons.size() == 0) {
						getPersonOver = true;
						break;
					}

					for (PersonBean personBean : persons) {

						// 用户没有抓取过，需要获取pageid
						if (personBean.getPersonPageId() == null) {

							// 手机端的个人信息获取地址
							String mobileUserInfoUrl = MOBIL_USER_INFO_PREFIX
									+ personBean.getPersonId();

							String personPageId = null;
							try {

								String html ="";
								//weiboGet.getHtml(mobileUserInfoUrl);

								// System.out.println(html);

								personPageId = weiboParse.parseUserPageId(html);

							} catch (Exception e) {
							}

							// 没有获得地址，略过去
							if (personPageId == null) {
								continue;
							}
							// 设置pageID
							personBean.setPersonPageId(personPageId);
							// 保存pageid
							personServer.save(personBean);

						}

						WeiboTaskBean wtb = new WeiboTaskBean();

						wtb.setStopReson(StopReson.StopByTime);

						SimpleDateFormat sdf = new SimpleDateFormat(
								"yyyy-MM-dd hh:mm");

						if (personBean.getLastWeiboUpdateTime() != null) {
							wtb.setStopValue(sdf.format(personBean
									.getLastWeiboUpdateTime()));
						} else {
							wtb.setStopValue(DEFAULT_SPIDER_TIME);
						}

						wtb.setPerson(personBean);

						taskQueue.add(wtb);

					}
				}
				System.out.println("get person is over!");
			}

		}.start();

		for (int i = 0; i < this.spiderNum; i++) {

			new Thread(this).start();

		}

	}

	private static int conut = 0;
	private static int save = 0;
	private static int userCount = 0;
	private static long startTime = System.currentTimeMillis();

	public void doTask(WeiboTaskBean taskBean) {

		userCount++;
		// 获取要爬取的用户id
		String spiderUid = taskBean.getPerson().getPersonPageId();

		// WeiboBean tempWeiboBean = null;
		boolean isFirst = true;
		while (true) { // while start
			// 获取当前页数
			int spiderPage = taskBean.getCurrPage();
			// 重置页数
			taskBean.setCurrPage(spiderPage + 1);

			try {

				System.out.println("当前抓取id：" + spiderUid);

				long t1 = System.currentTimeMillis();

				// 按在一页中分部获取微博
				for (int i = 1; i <= 3; i++) { // for start

					List<WeiboBean> weiboList = getByUserIdAndPart(spiderUid,
							spiderPage, i);

					System.out.println("數目:" + weiboList.size() + " 抓取延时："
							+ (System.currentTimeMillis() - t1) / 1000);

					// 如果当页内容为空，这停止抓取
					if (weiboList.isEmpty()) {
						return;
					}

					boolean judgeStop = false;
					conut += weiboList.size();
					// 循环判断当前抓取的数据书否符合终止条件
					for (WeiboBean w : weiboList) { // for (WeiboBean w :
													// weiboList) start

						// System.out.println("////////////////////////////////");
						// 记录第一条获得的数据,这是用户最新的一条微博
						if (isFirst) { // if (isFirst) start
							isFirst = false;
							PersonBean ps = taskBean.getPerson();
							// ps.setLastSpiderTime(new Date());

							SimpleDateFormat sdf = new SimpleDateFormat(
									"yyyy-MM-dd hh:mm");
							Date date = null;
							try {
								date = sdf.parse(w.getWeiboTime());
							} catch (Exception e) {
								e.printStackTrace();
							}
							ps.setLastWeiboUpdateTime(date);
							ps.setLastWeiboId(w.getWeiboID());
							taskBean.setPerson(ps);
						} // if (isFirst) end

						// 直接保存数据
						try {
							System.out.println(w.getWeiboTime());
							System.out.println(w.getWeiboContext());
							System.out.println("=====================");
							weiboServer.save(w);
							save++;

						} catch (Exception e) {
							// System.out.println(w + "  \n" + e.getMessage());
						} finally {

							long costTime = (System.currentTimeMillis() - startTime) / 1000;

							String strTime;

							if (costTime > 60) {
								int costMin = (int) (costTime / 60);
								if (costMin > 60) {
									strTime = (costMin / 60) + "时"
											+ (costMin % 60) + "分"
											+ (costTime % 60) + "秒";
								} else {
									strTime = (costMin) + "分" + (costTime % 60)
											+ "秒";
								}
							} else {
								strTime = costTime + "秒";
							}

							System.out.println("时间:" + strTime + " 用户数目："
									+ userCount);

							System.out.println("获取：" + conut + " 速度："
									+ (conut * 1f / costTime * 60) + " /min");
							System.out.println("保存:" + save + " 速度："
									+ (save * 1f / costTime * 60) + " /min");
						}
						// 判断是否达到终止条件
						judgeStop = this.judgeStop(taskBean, w);
						// 符合终止条件，跳出任务
						if (judgeStop) {
							return;
						}

					} // for (WeiboBean w : weiboList) end

				} // forn end

			} catch (Exception ex) {
				Logger.getLogger(WeiboSpiderServer.class.getName()).log(
						Level.SEVERE, null, ex);
			}
		} // while end

	}

	private int overThreadCount = 0;

	public void run() {

		while (true) {

			String tname = Thread.currentThread().getName();
			System.out.println(tname + " will get task");

			WeiboTaskBean task;

			synchronized (taskQueue) {

				if (getPersonOver && taskQueue.isEmpty()) {
					break;
				}

				task = taskQueue.poll();

			}

			if (task == null) {
				continue;
			}

			System.out.println(tname + " get task!");
			this.doTask(task);
			PersonBean person = task.getPerson();
			person.setLastSpiderTime(new Date());

			// 该任务的账号中没有获取微博，可设其微博更新时间为现在
			if (person.getLastWeiboUpdateTime() == null) {
				person.setLastWeiboUpdateTime(new Date());
			}

			if (person.getSpiderTimes() == null) {
				person.setSpiderTimes(1);
			} else {
				person.setSpiderTimes(person.getSpiderTimes() + 1);
			}

			person.setStatus(PersonBean.STU_BEEN_SPIDER);
			personServer.save(person);

		}

		overThreadCount++;

		// 所有任务都完成后，更新状态表
		if (overThreadCount == this.spiderNum) {

			this.personServer.updatePersonSpiderStatus(
					PersonBean.STU_WAIT_SPIDER, PersonBean.STU_BEEN_SPIDER);

			this.personServer.updatePersonSpiderStatus(
					PersonBean.STU_WAIT_SPIDER, PersonBean.STU_IN_SPIDER);

		}

		System.out.println(Thread.currentThread().getName() + "结束抓取任务");

	}

	public void te() {

		boolean login = this.login("kyyblabla@163.com", "519519519");

		if (login) {
			System.out.println("登陆成功！");
		} else {
			System.out.println("登陆失败！");
			return;
		}

		try {
			String html=""; // =
							// this.weiboGet.getWeiboPageByUserId("1005051338738570");

		//	html = this.weiboGet.getHtml("http://weibo.com/3894391642");
			// String pageId=weiboParse.parseUserPageId(html);
			System.out.println(html);

		} catch (Exception e) {

			e.printStackTrace();
		}

	}

	public void te2() {

		System.out.println("========================");
		List<PersonBean> somePersonByLastSpiderTime = personServer
				.getSomePersonByLastSpiderTime(0, 10);

		for (PersonBean personBean : somePersonByLastSpiderTime) {

			System.out.println(personBean);

		}

		System.out.println("========================");
	}

	public String getHtml(String url) {

//		try {
//			return weiboGet.
//		} catch (Exception e) {
//
//			e.printStackTrace();
//		}

		return "";

	}

	public static void main(String[] args) {

		WeiboSpiderServer w = new WeiboSpiderServer();

		w.login("18203690957", "yanahui90");

		System.out.println("=================");

		List<WeiboBean> bs = w.getByUserId("1005051164552632", 1);

		for (WeiboBean weiboBean : bs) {

			System.out.println(weiboBean.getWeiboID());
			System.out.println(weiboBean.getWeiboContext());
			System.out.println("------------------");
		}

		System.out.println("=================");
	}

}
